getwd()
## [1] "/Users/damodarpai/Documents/Labratories/Lab 8"
?apply
a = 0
b=5
runner = runif(10,a,b)
runner
## [1] 4.3536633 1.7231909 4.7865070 0.6749843 0.7641723 4.7109514 0.7483980
## [8] 2.0880121 2.6686000 4.3048372
#population
mu = (a+b)/2
sigmaSq = ((b-a)^2)/12
mu
## [1] 2.5
sigmaSq
## [1] 2.083333
#sample
xbar = mean(runner)
sampleVar = var(runner)
xbar
## [1] 2.682332
sampleVar
## [1] 2.965662
T = Y₁ + … + Yₙ Ȳ = T/n = (Y₁ + … + Yₙ)/n
E(Ȳ) = E((Y₁ + … + Yₙ)/n) = (nE(Yᵢ))/n = E(Yᵢ)
E(T) = nE(Yᵢ)
V(Ȳ) = V(T/n) = (1/n²)V(T) = (nV(Yᵢ))/n² = V(Yᵢ)/n
V(T) = nV(Yᵢ)
myclt=function(n,iter,a=0,b=5){
y=runif(n*iter,a,b)
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
sm=apply(data,2,sum)
h=hist(sm,plot=FALSE)
hist(sm,col=rainbow(length(h$mids)),freq=FALSE,main="Distribution of the sum of uniforms")
curve(dnorm(x,mean=n*(a+b)/2,sd=sqrt(n*(b-a)^2/12)),add=TRUE,lwd=2,col="Blue")
sm
}
w = myclt(10,10000)
## A: runif gives us n*iter number of variable values within the
distribution between a and b of the given values. ## B: The data
variable contains the matrix with columns being the number of iterations
we ran the run if statement and the rows being the number of variables
we have in each iterations. ## C: sm applies the sum function to all of
the data with a margin of 2. ## D: The w holds the historgram of the
data that we’ve taken the sum of.
sum(w)
## [1] 250110.2
var(w)
## [1] 20.486
mycltu =function(n,iter,a=0,b=5){
y=runif(n*iter,a,b)
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
mn=apply(data,2,mean)
h=hist(mn,plot=FALSE)
hist(mn,col=rainbow(length(h$mids)),freq=FALSE,main="Distribution of the sum of uniforms")
curve(dnorm(x,mean=n*(a+b)/2,sd=sqrt(n*(b-a)^2/12)),add=TRUE,lwd=2,col="Blue")
mn
}
fnMean = myclt(10,10000)
mean(fnMean)
## [1] 24.97528
var(fnMean)
## [1] 20.75384
mycltu=function(n,iter,a=0,b=10){
## r-random sample from the uniform
y=runif(n*iter,a,b)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
hist(w,freq=FALSE, ylim=c(0,ymax), main=paste("Histogram of sample mean",
"\n", "sample size= ",n,sep=""),xlab="Sample mean")
## add a density curve made from the sample distribution
lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
## Add the density from which the samples were taken
curve(dunif(x,a,b),add=TRUE,lwd=4)
}
mycltu(1,10000,0,10)
## As we can see here and stated in the video, the distribution seems to
take a triangle shape that is almost normal.
mycltu(2,10000,0,10)
mycltu(3,10000,0,10)
mycltu(5,10000,0,10)
mycltu(10,10000,0,10)
mycltu(30,10000,0,10)
## My conclusion from the above graphs is that as the number of n
increases, the histogram of sample means represents a more normal graph
that is symmetric and follows a uniform distribution.This is
representative of what the Central Limit Theorem is about because at
n=30,we claim that the sample size is large enough to represent
normality given that all other criteria are met.
mycltb=function(n,iter,p=0.5,...){
## r-random sample from the Binomial
y=rbinom(n*iter,size=n,prob=p)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
## freq=FALSE means take a density
hist(w,freq=FALSE, ylim=c(0,ymax),
main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3)
}
mycltb(4,10000,0.3)
mycltb(5,10000,0.3)
mycltb(10,10000,0.3)
mycltb(20,10000,0.3)
mycltb(4,10000,0.7)
mycltb(5,10000,0.7)
mycltb(10,10000,0.7)
mycltb(20,10000,0.7)
mycltb(4,10000,0.5)
mycltb(5,10000,0.5)
mycltb(10,10000,0.5)
mycltb(20,10000,0.5)
mycltp=function(n,iter,lambda=10,...){
## r-random sample from the Poisson
y=rpois(n*iter,lambda=lambda)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Make a suitable layout for graphing
layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE))
## Now we can make the histogram
hist(w,freq=FALSE, ylim=c(0,ymax), col=rainbow(max(w)),
main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
# Now make a new plot
# Since y is discrete we should use a barplot
barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" )
x=0:max(y)
plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)),
main="Probability function for Poisson", ylab="Probability",xlab="y")
}
mycltp(2,10000,4)
mycltp(3,10000,4)
mycltp(5,10000,4)
mycltp(10,10000,4)
mycltp(20,10000,4)
mycltp(2,10000,10)
mycltp(3,10000,10)
mycltp(5,10000,10)
mycltp(10,10000,10)
mycltp(20,10000,10)
# Task 6
MATH4753DPAI24::mycltp(4,10000,10)